Snapshot Policy + Snapshot Diff Tool

This tool will create an every minute snapshot policy. This will allow keeping track of every file create and/or modify. Use cases that this tool could help support include:

  • Watching for file changes to kick off a virus scan
  • Watching for the unexpected creation of files
  • Supporting a full index of the file system for search, find, etc

In [1]:
import re
import time
import pprint
from qumulo.rest_client import RestClient

rc = RestClient("qumulo.test", 8000)
rc.login("admin", "*********");

Create Snapshot Policy

Default is on the root directory, with 10 minutes of snapshots as a small buffer.


In [1]:
def create_policy_for_diff(rc, policy_name, path='/', minutes=10):
    try:
        dets = rc.fs.get_file_attr(path=path)
    except:
        print("!!! Unable to find directory: %s" % path)
        return
    policy = rc.snapshot.create_policy(
        name = policy_name,
        directory_id = dets['id'],
        schedule_info = {"creation_schedule":{
                        "frequency":"SCHEDULE_HOURLY_OR_LESS",
                        "fire_every":1,
                        "fire_every_interval":"FIRE_IN_MINUTES",
                        "window_start_hour":0,
                        "window_start_minute":0,
                        "window_end_hour":23,
                        "window_end_minute":59,
                        "on_days":["MON","TUE","WED","THU","FRI","SAT","SUN"],
                        "timezone":"America/Los_Angeles"
                        },
                       "expiration_time_to_live":"%sminutes" % minutes
                      }
    )
    print("Created policy on directory '%s': %s expires after %s" % (
                        path,
                        policy['name'], 
                        policy['schedules'][0]['expiration_time_to_live']))

Diff all snapshots in a policy

Warning: This will delete the oldest snapshots after it's done the diff!


In [17]:
def diff_snaps(rc, policy_name):
    snap_count = 2 # set up for the 1st loop
    paths = []
    while snap_count >= 2:
        all_snaps = rc.snapshot.list_snapshot_statuses()['entries']
        short_list = filter(lambda s: s['name'] == policy_name, all_snaps)
        snaps = sorted(short_list, key=lambda s: s['id'])
        if len(snaps) < 2:
            break
        print("Diff times: %s -> %s" % (snaps[0]['timestamp'][0:19], 
                                        snaps[1]['timestamp'][0:19]))
        diff = rc.snapshot.get_all_snapshot_tree_diff(snaps[1]['id'], 
                                                      snaps[0]['id'])
        for d in diff:
            for e in d['entries']:
                if e['path'][-1] == "/":
                    continue # it's a directory
                sz = None
                owner = None
                try:
                    dets = rc.fs.get_file_attr(e['path'])
                    sz = dets['size']
                    owner = dets['owner_details']['id_value']
                except:
                    pass
                if e['op'] == 'DELETE' and sz is not None:
                    continue # don't add deletes for existing files
                paths.append({'op': e['op'],
                              'path': e['path'], 
                              'size': sz, 
                              'owner': owner,
                              'snapshot_id': snaps[1]['id']})
        # delete the oldest snapshot
        rc.snapshot.delete_snapshot(snaps[0]['id'])
        snap_count = len(snaps) - 1
    return paths

In [18]:
create_policy_for_diff(rc, 'EveryMinuteForDiffs')


Created policy on directory '/': EveryMinuteForDiffs expires after 10minutes

In [19]:
diff_list = diff_snaps(rc, 'EveryMinuteForDiffs')
print("Found %s file changes." % len(diff_list))


Diff times: 2020-04-28T17:16:23 -> 2020-04-28T17:17:23
Diff times: 2020-04-28T17:17:23 -> 2020-04-28T17:24:22
Diff times: 2020-04-28T17:24:22 -> 2020-04-28T17:25:22
Found 476 file changes.

In [20]:
owners = {}
ops = {}
diffs = {}
for d in diff_list:
    owners[d['owner']] = 1
    if d['op'] not in ops:
        ops[d['op']] = 1
    ops[d['op']] += 1
    diffs[d['snapshot_id']] = 1
print("Ops: %s" % ' | '.join(["%s:%s" % (k,v) for k, v in ops.items()]))
print("Diff count: %s" % len(diffs))
print("Owner count: %s" % len(owners))


Ops: CREATE:57 | MODIFY:397 | DELETE:25
Diff count: 3
Owner count: 6

In [ ]: